package com.wuman.jreadability;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: classes2.dex */
public class Readability {
    private static final String CONTENT_SCORE = "readabilityContentScore";
    private String mBodyCache;
    private final Document mDocument;
    private boolean mIsDark;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes2.dex */
    public static class Patterns {
        private static final String REGEX_KILL_BREAKS = "(<br\\s*\\/?>(\\s|&nbsp;?)*){1,}";
        private static final String REGEX_NORMALIZE = "\\s{2,}";
        private static final String REGEX_REPLACE_BRS = "(?i)(<br[^>]*>[ \n\r\t]*){2,}";
        private static final String REGEX_REPLACE_FONTS = "(?i)<(\\/?)font[^>]*>";
        private static Pattern sDivToPElementsRe;
        private static Pattern sNegativeRe;
        private static Pattern sOkMaybeItsACandidateRe;
        private static Pattern sPositiveRe;
        private static Pattern sUnlikelyCandidatesRe;
        private static Pattern sVideoRe;

        /* loaded from: classes2.dex */
        public enum RegEx {
            UNLIKELY_CANDIDATES,
            OK_MAYBE_ITS_A_CANDIDATE,
            POSITIVE,
            NEGATIVE,
            DIV_TO_P_ELEMENTS,
            VIDEO
        }

        private Patterns() {
        }

        public static Pattern get(RegEx regEx) {
            switch (regEx) {
                case UNLIKELY_CANDIDATES:
                    if (sUnlikelyCandidatesRe == null) {
                        sUnlikelyCandidatesRe = Pattern.compile("combx|comment|disqus|foot|header|menu|meta|nav|rss|shoutbox|sidebar|sponsor", 2);
                    }
                    return sUnlikelyCandidatesRe;
                case OK_MAYBE_ITS_A_CANDIDATE:
                    if (sOkMaybeItsACandidateRe == null) {
                        sOkMaybeItsACandidateRe = Pattern.compile("and|article|body|column|main", 2);
                    }
                    return sOkMaybeItsACandidateRe;
                case POSITIVE:
                    if (sPositiveRe == null) {
                        sPositiveRe = Pattern.compile("article|body|content|entry|hentry|page|pagination|post|text", 2);
                    }
                    return sPositiveRe;
                case NEGATIVE:
                    if (sNegativeRe == null) {
                        sNegativeRe = Pattern.compile("combx|comment|contact|foot|footer|footnote|link|media|meta|promo|related|scroll|shoutbox|sponsor|tags|widget", 2);
                    }
                    return sNegativeRe;
                case DIV_TO_P_ELEMENTS:
                    if (sDivToPElementsRe == null) {
                        sDivToPElementsRe = Pattern.compile("<(a|blockquote|dl|div|img|ol|p|pre|table|ul)", 2);
                    }
                    return sDivToPElementsRe;
                case VIDEO:
                    if (sVideoRe == null) {
                        sVideoRe = Pattern.compile("http:\\/\\/(www\\.)?(youtube|vimeo)\\.com", 2);
                    }
                    return sVideoRe;
                default:
                    return null;
            }
        }
    }

    public Readability(File file, String str, String str2) throws IOException {
        this.mDocument = Jsoup.parse(file, str, str2);
    }

    public Readability(String str) {
        this.mDocument = Jsoup.parse(str);
    }

    public Readability(String str, String str2) {
        this.mDocument = Jsoup.parse(str, str2);
    }

    public Readability(URL url, int i) throws IOException {
        this.mDocument = Jsoup.parse(url, i);
    }

    public Readability(Document document) {
        this.mDocument = document;
    }

    private static void clean(Element element, String str) {
        Elements elementsByTag = getElementsByTag(element, str);
        boolean z = "object".equalsIgnoreCase(str) || "embed".equalsIgnoreCase(str) || "iframe".equalsIgnoreCase(str);
        Iterator<Element> it2 = elementsByTag.iterator();
        while (it2.hasNext()) {
            Element next = it2.next();
            Matcher matcher = Patterns.get(Patterns.RegEx.VIDEO).matcher(next.outerHtml());
            if (!z || !matcher.find()) {
                next.remove();
            }
        }
    }

    private void cleanConditionally(Element element, String str) {
        Iterator<Element> it2 = getElementsByTag(element, str).iterator();
        while (it2.hasNext()) {
            Element next = it2.next();
            int classWeight = getClassWeight(next);
            if (classWeight < 0) {
                next.remove();
            } else if (getCharCount(next, ",") < 10) {
                int size = getElementsByTag(next, "p").size();
                int size2 = getElementsByTag(next, "img").size();
                int size3 = getElementsByTag(next, "li").size() - 100;
                int size4 = getElementsByTag(next, "input").size();
                Iterator<Element> it3 = getElementsByTag(next, "embed").iterator();
                int i = 0;
                while (it3.hasNext()) {
                    if (!Patterns.get(Patterns.RegEx.VIDEO).matcher(it3.next().absUrl("src")).find()) {
                        i++;
                    }
                }
                float linkDensity = getLinkDensity(next);
                boolean z = true;
                int length = getInnerText(next, true).length();
                if (size2 <= size && ((size3 <= size || "ul".equalsIgnoreCase(str) || "ol".equalsIgnoreCase(str)) && size4 <= Math.floor(size / 3) && ((length >= 25 || (size2 != 0 && size2 <= 2)) && ((classWeight >= 25 || linkDensity <= 0.2f) && ((classWeight <= 25 || linkDensity <= 0.5f) && ((i != 1 || length >= 75) && i <= 1)))))) {
                    z = false;
                }
                if (z) {
                    next.remove();
                }
            }
        }
    }

    private static void cleanHeaders(Element element) {
        for (int i = 1; i < 7; i++) {
            Iterator<Element> it2 = getElementsByTag(element, "h" + i).iterator();
            while (it2.hasNext()) {
                Element next = it2.next();
                if (getClassWeight(next) < 0 || getLinkDensity(next) > 0.33f) {
                    next.remove();
                }
            }
        }
    }

    private static void cleanStyles(Element element) {
        if (element == null) {
            return;
        }
        if (!"readability-styled".equals(element.className())) {
            element.removeAttr("style");
        }
        for (Element first = element.children().first(); first != null; first = first.nextElementSibling()) {
            if (!"readability-styled".equals(first.className())) {
                first.removeAttr("style");
            }
            cleanStyles(first);
        }
    }

    private static int getCharCount(Element element, String str) {
        if (str == null || str.length() == 0) {
            str = ",";
        }
        return getInnerText(element, true).split(str).length;
    }

    private static int getClassWeight(Element element) {
        String className = element.className();
        if (!isEmpty(className)) {
            Matcher matcher = Patterns.get(Patterns.RegEx.NEGATIVE).matcher(className);
            Matcher matcher2 = Patterns.get(Patterns.RegEx.POSITIVE).matcher(className);
            r2 = matcher.find() ? -25 : 0;
            if (matcher2.find()) {
                r2 += 25;
            }
        }
        String id = element.id();
        if (isEmpty(id)) {
            return r2;
        }
        Matcher matcher3 = Patterns.get(Patterns.RegEx.NEGATIVE).matcher(id);
        Matcher matcher4 = Patterns.get(Patterns.RegEx.POSITIVE).matcher(id);
        if (matcher3.find()) {
            r2 -= 25;
        }
        return matcher4.find() ? r2 + 25 : r2;
    }

    private static int getContentScore(Element element) {
        try {
            return Integer.parseInt(element.attr(CONTENT_SCORE));
        } catch (NumberFormatException unused) {
            return 0;
        }
    }

    private static Elements getElementsByTag(Element element, String str) {
        Elements elementsByTag = element.getElementsByTag(str);
        elementsByTag.remove(element);
        return elementsByTag;
    }

    private static String getInnerText(Element element, boolean z) {
        String trim = element.text().trim();
        return z ? trim.replaceAll("\\s{2,}", "") : trim;
    }

    private static float getLinkDensity(Element element) {
        Elements elementsByTag = getElementsByTag(element, "a");
        int length = getInnerText(element, true).length();
        float f = 0.0f;
        while (elementsByTag.iterator().hasNext()) {
            f += getInnerText(r0.next(), true).length();
        }
        return f / length;
    }

    private static Element incrementContentScore(Element element, int i) {
        element.attr(CONTENT_SCORE, Integer.toString(getContentScore(element) + i));
        return element;
    }

    private void init(boolean z) {
        if (this.mDocument.body() != null && this.mBodyCache == null) {
            this.mBodyCache = this.mDocument.body().html();
        }
        prepDocument();
        Element createElement = this.mDocument.createElement("div");
        Element createElement2 = this.mDocument.createElement("div");
        Element articleTitle = getArticleTitle();
        Element grabArticle = grabArticle(z);
        if (isEmpty(getInnerText(grabArticle, false))) {
            if (!z) {
                this.mDocument.body().html(this.mBodyCache);
                init(true);
                return;
            }
            grabArticle.html("");
        }
        createElement2.appendChild(articleTitle);
        createElement2.appendChild(grabArticle);
        createElement.appendChild(createElement2);
        this.mDocument.body().html("");
        this.mDocument.body().prependChild(createElement);
    }

    private static void initializeNode(Element element) {
        element.attr(CONTENT_SCORE, Integer.toString(0));
        String tagName = element.tagName();
        if ("div".equalsIgnoreCase(tagName)) {
            incrementContentScore(element, 5);
        } else if ("pre".equalsIgnoreCase(tagName) || "td".equalsIgnoreCase(tagName) || "blockquote".equalsIgnoreCase(tagName)) {
            incrementContentScore(element, 3);
        } else if ("address".equalsIgnoreCase(tagName) || "ol".equalsIgnoreCase(tagName) || "ul".equalsIgnoreCase(tagName) || "dl".equalsIgnoreCase(tagName) || "dd".equalsIgnoreCase(tagName) || "dt".equalsIgnoreCase(tagName) || "li".equalsIgnoreCase(tagName) || "form".equalsIgnoreCase(tagName)) {
            incrementContentScore(element, -3);
        } else if ("h1".equalsIgnoreCase(tagName) || "h2".equalsIgnoreCase(tagName) || "h3".equalsIgnoreCase(tagName) || "h4".equalsIgnoreCase(tagName) || "h5".equalsIgnoreCase(tagName) || "h6".equalsIgnoreCase(tagName) || "th".equalsIgnoreCase(tagName)) {
            incrementContentScore(element, -5);
        }
        incrementContentScore(element, getClassWeight(element));
    }

    private static boolean isEmpty(String str) {
        return str == null || str.length() == 0;
    }

    private static void killBreaks(Element element) {
        element.html(element.html().replaceAll("(<br\\s*\\/?>(\\s|&nbsp;?)*){1,}", "<br />"));
    }

    private void prepArticle(Element element) {
        cleanStyles(element);
        killBreaks(element);
        clean(element, "form");
        clean(element, "object");
        clean(element, "h1");
        if (getElementsByTag(element, "h2").size() == 1) {
            clean(element, "h2");
        }
        clean(element, "iframe");
        cleanHeaders(element);
        cleanConditionally(element, "table");
        cleanConditionally(element, "ul");
        cleanConditionally(element, "div");
        Iterator<Element> it2 = getElementsByTag(element, "p").iterator();
        while (it2.hasNext()) {
            Element next = it2.next();
            int size = getElementsByTag(next, "img").size();
            int size2 = getElementsByTag(next, "embed").size();
            int size3 = getElementsByTag(next, "object").size();
            if (size == 0 && size2 == 0 && size3 == 0 && isEmpty(getInnerText(next, false))) {
                next.remove();
            }
        }
        try {
            element.html(element.html().replaceAll("(?i)<br[^>]*>\\s*<p", "<p"));
        } catch (Exception e) {
            dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.", e);
        }
    }

    private static Element scaleContentScore(Element element, float f) {
        element.attr(CONTENT_SCORE, Integer.toString((int) (getContentScore(element) * f)));
        return element;
    }

    protected void dbg(String str) {
        dbg(str, null);
    }

    protected void dbg(String str, Throwable th) {
    }

    protected Element getArticleTitle() {
        Element createElement = this.mDocument.createElement("h1");
        createElement.html(this.mDocument.title());
        return createElement;
    }

    protected Element grabArticle(boolean z) {
        Iterator<Element> it2 = this.mDocument.getAllElements().iterator();
        while (it2.hasNext()) {
            Element next = it2.next();
            if (!z) {
                String str = next.className() + next.id();
                Matcher matcher = Patterns.get(Patterns.RegEx.UNLIKELY_CANDIDATES).matcher(str);
                Matcher matcher2 = Patterns.get(Patterns.RegEx.OK_MAYBE_ITS_A_CANDIDATE).matcher(str);
                if (matcher.find() && matcher2.find() && !"body".equalsIgnoreCase(next.tagName())) {
                    next.remove();
                }
            }
            if ("div".equalsIgnoreCase(next.tagName()) && !Patterns.get(Patterns.RegEx.DIV_TO_P_ELEMENTS).matcher(next.html()).find()) {
                try {
                    next.tagName("p");
                } catch (Exception e) {
                    dbg("Could not alter div to p, probably an IE restriction, reverting back to div.", e);
                }
            }
        }
        Elements elementsByTag = this.mDocument.getElementsByTag("p");
        ArrayList arrayList = new ArrayList();
        Iterator<Element> it3 = elementsByTag.iterator();
        while (it3.hasNext()) {
            Element next2 = it3.next();
            Element parent = next2.parent();
            Element parent2 = parent.parent();
            if (getInnerText(next2, true).length() >= 25) {
                if (!parent.hasAttr(CONTENT_SCORE)) {
                    initializeNode(parent);
                    arrayList.add(parent);
                }
                if (!parent2.hasAttr(CONTENT_SCORE)) {
                    initializeNode(parent2);
                    arrayList.add(parent2);
                }
                int length = (int) (1 + r1.split(",").length + Math.min(Math.floor(r1.length() / 100), 3.0d));
                incrementContentScore(parent, length);
                incrementContentScore(parent2, length / 2);
            }
        }
        Element element = null;
        Iterator it4 = arrayList.iterator();
        while (it4.hasNext()) {
            Element element2 = (Element) it4.next();
            scaleContentScore(element2, 1.0f - getLinkDensity(element2));
            if (element == null || getContentScore(element2) > getContentScore(element)) {
                element = element2;
            }
        }
        if (element == null || "body".equalsIgnoreCase(element.tagName())) {
            element = this.mDocument.createElement("div");
            element.html(this.mDocument.body().html());
            this.mDocument.body().html("");
            this.mDocument.body().appendChild(element);
            initializeNode(element);
        }
        Element createElement = this.mDocument.createElement("div");
        createElement.attr("id", "readability-content");
        int max = Math.max(10, (int) (getContentScore(element) * 0.2f));
        Iterator<Element> it5 = element.parent().children().iterator();
        while (it5.hasNext()) {
            Element next3 = it5.next();
            boolean z2 = next3 == element;
            if (getContentScore(next3) >= max) {
                z2 = true;
            }
            if ("p".equalsIgnoreCase(next3.tagName())) {
                float linkDensity = getLinkDensity(next3);
                String innerText = getInnerText(next3, true);
                int length2 = innerText.length();
                if ((length2 > 80 && linkDensity < 0.25f) || (length2 < 80 && linkDensity == 0.0f && innerText.matches(".*\\.( |$).*"))) {
                    z2 = true;
                }
            }
            if (z2) {
                createElement.appendChild(next3);
            }
        }
        prepArticle(createElement);
        return createElement;
    }

    public final String html() {
        return this.mDocument.html();
    }

    public final void init() {
        init(false);
    }

    public final String outerHtml() {
        return this.mDocument.outerHtml();
    }

    protected void prepDocument() {
        String str;
        String str2;
        if (this.mDocument.body() == null) {
            this.mDocument.appendElement("body");
        }
        Iterator<Element> it2 = this.mDocument.getElementsByTag("script").iterator();
        while (it2.hasNext()) {
            it2.next().remove();
        }
        Iterator<Element> it3 = getElementsByTag(this.mDocument.head(), "link").iterator();
        while (it3.hasNext()) {
            Element next = it3.next();
            if ("stylesheet".equalsIgnoreCase(next.attr("rel"))) {
                next.remove();
            }
        }
        Iterator<Element> it4 = this.mDocument.getElementsByTag("style").iterator();
        while (it4.hasNext()) {
            it4.next().remove();
        }
        if (this.mIsDark) {
            str = "#ffffff";
            str2 = "#cccccc";
        } else {
            str = "#000000";
            str2 = "#333333";
        }
        this.mDocument.head().append("<meta name=\"viewport\" content=\"initial-scale = 1.0,maximum-scale = 1.0\" />");
        this.mDocument.head().append("<style type=\"text/css\">html,body,div,span,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,code,del,dfn,em,img,q,dl,dt,dd,ol,ul,li,fieldset,form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td{margin:0;padding:0;border:0;font-weight:inherit;font-style:inherit;font-family:inherit;vertical-align:baseline;}:focus{outline:none;}h1{font-size:200%;}h2{font-size:180%;}h3{font-size:160%;}h4{font-size:140%;}h5{font-size:120%;}h6,p{font-size:100%;}body{line-height:1.5;}table{border-collapse:separate;border-spacing:0;}caption,th,td{text-align:left;font-weight:normal;}table,td,th{vertical-align:middle;}blockquote:before,blockquote:after,q:before,q:after{content:\"\";}blockquote,q{quotes:\"\" \"\";}a img{border:none;outline:none;}ol{margin-left:2em;}sup{vertical-align:text-top;}sub{vertical-align:text-bottom;}html>body p code{white-space:normal;}* html legend{margin:-18px -8px 16px 0;padding:0;}* html hr,html>body hr{margin:-8px auto 11px;}</style>");
        this.mDocument.head().append("<style type=\"text/css\">body,div,p,span,h1,h2,h3,h4,h5,h6{color:" + str + ";!important}body{background:0 0;font-size:16px;font-family:minion-pro-1,minion-pro-2,Palatino,Georgia,\"Times New Roman\",serif;margin:10px}* html hr,html>body hr{margin:1em 0}p{margin-bottom:15px}a:hover,a:link,a:visited{color:" + str2 + "}#page{width:750px;margin:50px auto}#logo{color:#5f4b32;font-size:4em}#desp{color:#5f4b32;font-size:1.2em;margin:-2.5em 0 2em 1em}#form{margin:2em 0;text-align:right}#form input{font-size:1.5em}#form #url{width:500px}#form #submit{width:50px;border:none;background-color:transparent;cursor:pointer}#footer{overflow:hidden;text-align:right;list-style:none}#footer li{margin-right:-2px;float:right}#footer li a{border-right:1px solid #555;padding:0 .5em}.article{color:#5f4b32;font-size:16px}.article a:link{color:#5f4b32}.article h1,.article h2,.article h3,.article h4,.article h5,.article h6{margin:.5em 0 1.5em;font-weight:700}.article h1{font-size:1.8em}.article h1 a:hover,.article h1 a:link,.article h1 a:visited{text-decoration:none}.article h2{font-size:1.5em}.article h3{font-size:1.3em}.article h4{font-size:1.1em}.article h5{font-size:1em}.article h6{font-size:.8em}.article p{line-height:1.5em;font-size:20px;margin:0 0 1em}.article code,.article img,.article pre{padding:5px;border:1px solid #999;background:#fff;-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px;max-width:90%;margin:.3em 0}.article code,.article pre{margin:1em 0;font-size:14px;font-family:monaco,\"new courier\",courier,sans-serif}.article code *,.article pre *{border:0}.article dl,.article ol,.article ul{line-height:1.5em;font-size:18px;margin:0 0 .8em 2em}</style>");
    }

    public void setIsDark(boolean z) {
        this.mIsDark = z;
    }
}
